#for printing AUC results into tables used in MS

library(ggplot2)
library(reshape)
library(ggthemes)
library(scales)
library(dplyr)
library(Hmisc)
library(xtable)

load("Output/AUCresult - checkerboard.RData")
load("Output/AUCresult - lat.RData")

#================overall external AUC comparison for checkerboard===================#
#we subset to only external data
AUC.checkerboard.ex <- AUC.checkerboard %>% dplyr::filter(Eval == "external")
AUC.checkerboard.ex.mean <- aggregate(AUC ~ Model + speciesName, data = AUC.checkerboard.ex, FUN= "mean" )


#do wilcox and mean diff
table2 <- matrix(nrow = 14, ncol = 14)


for (i in 1:14){
    for (j in 1:14){
        if (j < i){
            table2[i,j] <- wilcox.test(AUC.checkerboard.ex.mean[which(AUC.checkerboard.ex.mean$Model == levels(AUC.checkerboard.ex.mean$Model)[i]),3],
                                       AUC.checkerboard.ex.mean[which(AUC.checkerboard.ex.mean$Model == levels(AUC.checkerboard.ex.mean$Model)[j]),3],
                                     paired = T)$p.value            
        }else{
            table2[i,j] <- mean(AUC.checkerboard.ex.mean[which(AUC.checkerboard.ex.mean$Model == levels(AUC.checkerboard.ex.mean$Model)[i]),3] - 
                                    AUC.checkerboard.ex.mean[which(AUC.checkerboard.ex.mean$Model == levels(AUC.checkerboard.ex.mean$Model)[j]),3])   
        }
        table2[i,i] <- NA
        
    }
}

colnames(table2) <- as.character(levels(AUC.checkerboard.ex$Model))
rownames(table2) <- as.character(levels(AUC.checkerboard.ex$Model))


#print it
xtable(table2, digits = 2)


#=================================best CV models and consistency with best external models====================================
#subset to just individual models
AUC.checkerboard.ind <- AUC.checkerboard %>% filter(Model %in% levels(AUC.checkerboard$Model)[1:8])
#average across folds
AUC.checkerboard.ind.mean <- aggregate(AUC ~ Model + speciesName + Eval, data = AUC.checkerboard.ind, FUN= "mean" )
#get best model
AUC.checkerboard.ind.best <- AUC.checkerboard.ind.mean %>% group_by(speciesName, Eval) %>% top_n(1, AUC)
AUC.checkerboard.ind.best.external <- AUC.checkerboard.ind.best %>% filter(Eval == "external")
AUC.checkerboard.ind.best.random <- AUC.checkerboard.ind.best %>% filter(Eval == "random")
for (i in 1:14){
    AUC.checkerboard.ind.best.random$externalAUC[i] <- AUC.checkerboard.ex.mean[which(AUC.checkerboard.ex.mean$speciesName == AUC.checkerboard.ind.best.random$speciesName[i] & AUC.checkerboard.ex.mean$Model == AUC.checkerboard.ind.best.random$Model[i]),"AUC"]
}

AUC.checkerboard.ind.best.block <- AUC.checkerboard.ind.best %>% filter(Eval == "block")
for (i in 1:14){
    AUC.checkerboard.ind.best.block$externalAUC[i] <- AUC.checkerboard.ex.mean[which(AUC.checkerboard.ex.mean$speciesName == AUC.checkerboard.ind.best.block$speciesName[i] & AUC.checkerboard.ex.mean$Model == AUC.checkerboard.ind.best.block$Model[i]),"AUC"]
}



table3 <- cbind.data.frame(as.character(AUC.checkerboard.ind.best.external$speciesName), 
                               as.character(AUC.checkerboard.ind.best.random$Model), 
                           AUC.checkerboard.ind.best.external$AUC - AUC.checkerboard.ind.best.random$externalAUC,
                               as.character(AUC.checkerboard.ind.best.block$Model),
                           AUC.checkerboard.ind.best.external$AUC - AUC.checkerboard.ind.best.block$externalAUC,
                               as.character(AUC.checkerboard.ind.best.external$Model), 
                           AUC.checkerboard.ind.best.external$AUC)
colnames(table3) <- c("Species", "Random", "AUC loss", "Block", "AUC loss", "External best", "Best external AUC")

#print it
xtable(table3, digits = 2)

#==============================do the same for lat====================================

#================overall external AUC comparison for lat===================#
#we subset to only external data
AUC.lat.ex <- AUC.lat %>% dplyr::filter(Eval == "external")
AUC.lat.ex.mean <- aggregate(AUC ~ Model + speciesName, data = AUC.lat.ex, FUN= "mean" )


#do mean diff and pairwise tally (n = 4 so no statistical tests)
table4 <- matrix(nrow = 14, ncol = 14)


for (i in 1:14){
    for (j in 1:14){
        if (j < i){
            table4[i,j] <- as.character(sum(AUC.lat.ex.mean[which(AUC.lat.ex.mean$Model == levels(AUC.lat.ex.mean$Model)[i]),3] >= 
                                   AUC.lat.ex.mean[which(AUC.lat.ex.mean$Model == levels(AUC.lat.ex.mean$Model)[j]),3]))           
        }else{
            table4[i,j] <- sprintf("%.2f", round(round(mean(AUC.lat.ex.mean[which(AUC.lat.ex.mean$Model == levels(AUC.lat.ex.mean$Model)[i]),3] - 
                                                                AUC.lat.ex.mean[which(AUC.lat.ex.mean$Model == levels(AUC.lat.ex.mean$Model)[j]),3]),2),2))
        }
        table4[i,i] <- NA
        
    }
}

colnames(table4) <- as.character(levels(AUC.lat.ex$Model))
rownames(table4) <- as.character(levels(AUC.lat.ex$Model))


#print it
xtable(table4, digits = 2)


#=================================best CV models and consistency with best external models====================================
#subset to just individual models
AUC.lat.ind <- AUC.lat %>% filter(Model %in% levels(AUC.lat$Model)[1:8])
#average across folds
AUC.lat.ind.mean <- aggregate(AUC ~ Model + speciesName + Eval, data = AUC.lat.ind, FUN= "mean" )
#get best model
AUC.lat.ind.best <- AUC.lat.ind.mean %>% group_by(speciesName, Eval) %>% top_n(1, AUC)
AUC.lat.ind.best.external <- AUC.lat.ind.best %>% filter(Eval == "external")
AUC.lat.ind.best.random <- AUC.lat.ind.best %>% filter(Eval == "random")
for (i in 1:4){
    AUC.lat.ind.best.random$externalAUC[i] <- AUC.lat.ex.mean[which(AUC.lat.ex.mean$speciesName == AUC.lat.ind.best.random$speciesName[i] & AUC.lat.ex.mean$Model == AUC.lat.ind.best.random$Model[i]),"AUC"]
}

AUC.lat.ind.best.block <- AUC.lat.ind.best %>% filter(Eval == "block")
for (i in 1:4){
    AUC.lat.ind.best.block$externalAUC[i] <- AUC.lat.ex.mean[which(AUC.lat.ex.mean$speciesName == AUC.lat.ind.best.block$speciesName[i] & AUC.lat.ex.mean$Model == AUC.lat.ind.best.block$Model[i]),"AUC"]
}


table5 <- cbind.data.frame(as.character(AUC.lat.ind.best.external$speciesName), 
                           as.character(AUC.lat.ind.best.random$Model), 
                           AUC.lat.ind.best.external$AUC - AUC.lat.ind.best.random$externalAUC,
                           as.character(AUC.lat.ind.best.block$Model),
                           AUC.lat.ind.best.external$AUC - AUC.lat.ind.best.block$externalAUC,
                           as.character(AUC.lat.ind.best.external$Model), 
                           AUC.lat.ind.best.external$AUC)
colnames(table5) <- c("Species", "Random", "AUC loss", "Block", "AUC loss", "External best", "Best external AUC")

#print it
xtable(table5, digits = 2)
